thresh = read_csv("./replication_file/_4_data/_thresholds_update.csv") 

pop <- read_csv('./replication_file/_4_data/census_stack_long_new_bc.csv') %>%
  group_by(city, YEAR) %>%
  summarise(pop=sum(num))

civil_reform = 
  read_csv("./replication_file/_4_data/cities_reform.csv") 

state_abbrev = 
  read_csv("./replication_file/_4_data/states.csv") 

rd_all <-
  thresh %>%
  mutate(min_pop = as.numeric(str_remove_all(min_pop, ','))) %>% 
  filter(coverage=='total' & pop_threshold==1 & optional==0) %>%
  select(-city) %>%
  filter(str_detect(specific_dept, 'civil_service') | is.na(specific_dept)) %>% 
  group_by(abbrev, state, min_pop, max_pop) %>%
  # minimum year
  summarise(rd_year = min(year)) %>%
  # adding in population and seeing if in threshold
  inner_join(pop %>%
               mutate(abbrev = str_trim(str_extract(city, '[a-z]{2}$')))) %>%
  mutate(time = rd_year < YEAR,
         treat = case_when(rd_year <= YEAR & pop >= min_pop & is.na(max_pop) ~ 1,
                           rd_year <= YEAR & is.na(min_pop) & pop <= max_pop ~ 1,
                           rd_year <= YEAR & pop >= min_pop & pop <= max_pop ~ 1,
                           T ~ 0),
         bandwidth = ifelse(!is.na(min_pop), pop - min_pop, max_pop - pop)) %>%
  #filter(city=='adams, ma') %>% View()
  group_by(city, YEAR) %>%
  filter(treat == max(treat)) %>%
  filter(abs(bandwidth)== min(abs(bandwidth))) %>%
  select(-min_pop, -max_pop) %>%
  rename(treat_assign = treat) %>%
  distinct() %>%
  # joining with existing civil service data to look at first stage
  full_join(civil_reform %>%
              left_join(state_abbrev) %>%
              mutate(city = paste0(str_to_lower(str_trim(City)), ', ', str_to_lower(str_trim(Abbreviation)))) %>%
              filter(!(police_only==0 & fire_only==1)) %>%
              group_by(city) %>%
              summarise(actual_year = min(Year))) %>%
  mutate(treat_actual = ifelse(YEAR >= actual_year, 1, 0)) %>%
  full_join(analysis_df_3 %>%
              filter(occ=='blue_collar')) %>%
  full_join(analysis_df_4 %>%
              filter(occ=='blue_collar')) %>%
  full_join(analysis_df_3 %>%
              filter(occ=='white_collar') %>%
              rename_at(vars(govt_occ_total:white_x_native_born), function(x){paste0(x, '_wc')}) %>%
              select(city, YEAR, govt_occ_total_wc:white_x_native_born_wc)) %>%
  full_join(analysis_df_4 %>%
              filter(occ=='white_collar') %>%
              rename_at(vars(govt_german:govt_russian, german:russian), function(x){paste0(x, '_wc')}) %>%
              select(city, YEAR, govt_german_wc:govt_russian_wc, german_wc:russian_wc))

# making figure 5
rd_plot_main <-
  rd_all %>%
  filter(YEAR - rd_year > 0) %>%
  mutate(native_diff = govt_white_x_native_born - white_x_native_born, 
         foreign_diff = govt_white_x_foreign_born - white_x_foreign_born, 
         black_diff = govt_black_x_native_born - black_x_native_born, 
         native_diff_wc = govt_white_x_native_born_wc - white_x_native_born_wc, 
         foreign_diff_wc = govt_white_x_foreign_born_wc - white_x_foreign_born_wc, 
         black_diff_wc = govt_black_x_native_born_wc - black_x_native_born_wc, 
         side = ifelse(bandwidth>=0, '1', '0'),
         time = case_when(rd_year > YEAR ~ "1. Before",
                          TRUE ~ "2. After")) %>%
  pivot_longer(cols = c(foreign_diff, black_diff, native_diff, foreign_diff_wc, native_diff_wc, black_diff_wc)) %>%
  mutate(group = case_when(str_detect(name, 'foreign') ~ 'Foreign-Born Whites',
                           str_detect(name, 'black') ~ 'Native-Born Blacks',
                           str_detect(name, 'native') ~ 'Native-Born Whites'),
         occ = case_when(str_detect(name, '_wc') ~ "White Collar",
                                    T ~ "Blue Collar")) %>%
  filter(abs(bandwidth)<15000) %>%
  ggplot(., aes(x = bandwidth, y = value, group=side)) + 
  geom_point(alpha=.05) + 
  geom_smooth(method='lm', col = 'black') + 
  stat_summary_bin(fun='mean', bins=20,
                   color='red', size=1, geom='point', alpha = 0.8) +
  geom_vline(xintercept=0, lty=2) + 
  facet_grid(group~occ) + 
  coord_cartesian(xlim=c(-15000, 15000), ylim=c(-.25, .25)) + 
  labs(x = 'Distance from Theshold (Population)', 
       y = 'Overrepresentation of Group\nin Municipal Civil Service') + 
  theme_bw() +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major.x = element_blank(),
        axis.line.y.left = element_blank(),
        legend.position = "none",
        axis.ticks.y = element_blank(),
        strip.background = element_blank(),
        legend.title = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_blank())
ggsave('./replication_file/_5_outputs/figures/figure_5.png', plot = rd_plot_main, width=11, height=8)

# tabular results
rhs_data = rd_all %>%
  filter(time==T)

rd_fuzzy <-
  rdrobust(y = unlist(rhs_data[,'govt_white_x_foreign_born']),
         x = rhs_data$bandwidth,
         c=0,
         fuzzy = rhs_data$treat_assign,
         covs = rhs_data[,c('white_x_foreign_born')],
         cluster = rhs_data$city)
summary(rd_fuzzy)

out <- NULL
for(i in c('white_x_foreign_born', 'white_x_native_born', 'black_x_native_born',
           'white_x_foreign_born_wc', 'white_x_native_born_wc', 'black_x_native_born_wc')){
  rdrobust(y = unlist(rhs_data[,paste0('govt_', i)]),
           x = rhs_data$bandwidth,
           c=0,
           fuzzy = rhs_data$treat_assign,
           covs = unlist(rhs_data[,i]),
           cluster = rhs_data$city) -> rd_fuzzy
  out <- rbind(out, c(i, rd_fuzzy$Estimate[c(1, 3, 4)], rd_fuzzy$bws[1,c(1)], sum(rd_fuzzy$N), sum(rd_fuzzy$N_h)))
}

# making table A2
rd_results <- out %>%
  as.data.frame() %>%
  rename(Group = 1, `Estimate` = 2, `SE (Conv.)`=3, `SE (Robust)`=4, `BW`=5, `N`=6, , `Eff. N`=7) %>%
  mutate_at(vars(-Group), as.numeric) %>%
  mutate(Occupation = case_when(str_detect(Group, '_wc') ~ 'White Collar',
                                T ~ 'Blue Collar'),
         Group = case_when(str_detect(Group, 'foreign') ~ 'Foreign W.',
                           str_detect(Group, 'black') ~ 'Native B.',
                           str_detect(Group, 'native') ~ 'Native W.')) %>%
  select(Group, Occupation, Estimate:`Eff. N`)

# nativity
out <- NULL
for(i in c('german', 'irish', 'italian', 'polish', 'russian',
           'german_wc', 'irish_wc', 'italian_wc', 'polish_wc', 'russian_wc')){
  rdrobust(y = unlist(rhs_data[,paste0('govt_', i)]),
           x = rhs_data$bandwidth,
           c=0,
           fuzzy = rhs_data$treat_assign,
           covs = unlist(rhs_data[,i]),
           cluster = rhs_data$city) -> rd_fuzzy
  out <- rbind(out, c(i, rd_fuzzy$Estimate[c(1, 3, 4)], rd_fuzzy$bws[1,c(1)], sum(rd_fuzzy$N), sum(rd_fuzzy$N_h)))
}

# making table A3
rd_nativity <- out %>%
  as.data.frame() %>%
  rename(Group = 1, `Estimate` = 2, `SE (Conv.)`=3, `SE (Robust)`=4, `BW`=5, `N`=6, , `Eff. N`=7) %>%
  mutate_at(vars(-Group), as.numeric) %>%
  mutate(Occupation = case_when(str_detect(Group, '_wc') ~ 'White Collar',
                                T ~ 'Blue Collar'),
         Group = case_when(str_detect(Group, 'german') ~ 'German',
                           str_detect(Group, 'irish') ~ 'Irish',
                           str_detect(Group, 'italian') ~ 'Italian',
                           str_detect(Group, 'polish') ~ 'Polish',
                           str_detect(Group, 'russian') ~ 'Russian'))%>%
  select(Group, Occupation, Estimate:`Eff. N`)

cat(stargazer(rd_results, summary=F, rownames=F), file = './replication_file/_5_outputs/tables/rd_results.tex')
cat(stargazer(rd_nativity, summary=F, rownames=F), file = './replication_file/_5_outputs/tables/rd_results_nativity.tex')
  
cat(stargazer(rd_results, summary=F, rownames=F), file = './replication_file/_5_outputs/tables/table_a3_1.tex')
cat(stargazer(rd_nativity, summary=F, rownames=F), file = './replication_file/_5_outputs/tables/table_a3_2.tex')


